Last updated: 2019-06-26
Checks: 4 1
Knit directory: ~/output_new/
This reproducible R Markdown analysis was created with workflowr (version 1.3.0). The Checks tab describes the reproducibility checks that were applied when the results were created. The Past versions tab lists the development history.
Great job! The global environment was empty. Objects defined in the global environment can affect the analysis in your R Markdown file in unknown ways. For reproduciblity it’s best to always run the code in an empty environment.
The command set.seed(12345) was run prior to running the code in the R Markdown file. Setting a seed ensures that any results that rely on randomness, e.g. subsampling or permutations, are reproducible.
Great job! Recording the operating system, R version, and package versions is critical for reproducibility.
Nice! There were no cached chunks for this analysis, so you can be confident that you successfully produced the results during this run.
Tracking code development and connecting the code version to the results is critical for reproducibility. To start using Git, open the Terminal and type git init in your project directory.
This project is not being versioned with Git. To obtain the full reproducibility benefits of using workflowr, please see ?wflow_start.
Cellbench data, test batch effect from different protocols. Using the same three celllines sequenced by sequencing methods. This represents a technical batch effect.
libraries
suppressPackageStartupMessages({
library(CellBench)
library(scater)
library(jcolors)
library(CellMixS)
library(gridExtra)
library(purrr)
library(jcolors)
library(here)
library(tidyr)
library(dplyr)
library(stringr)
library(variancePartition)
library(diffcyt)
library(ComplexHeatmap)
})
Dataset
# data
out_path <- '/home/zjanna/output_new'
code_path <- '/home/zjanna'
dataset_name <- "cellBench"
#Mixology dataset
sc_data <- load_sc_data()
Warning: `overscope_eval_next()` is deprecated as of rlang 0.2.0.
Please use `eval_tidy()` with a data mask instead.
This warning is displayed once per session.
Warning: `overscope_clean()` is deprecated as of rlang 0.2.0.
This warning is displayed once per session.
colData(sc_data[[1]])$protocol <- rep(names(sc_data)[1], ncol(sc_data[[1]]))
sce <- sc_data[[1]]
for(i in 2:length(sc_data)){
colData(sc_data[[i]])$protocol <- rep(names(sc_data)[i], ncol(sc_data[[i]]))
gene_overlap <- intersect(rownames(sce), rownames(sc_data[[i]]))
coldata_overlap <- intersect(names(colData(sce)), names(colData(sc_data[[i]])))
sc_data[[i]] <- sc_data[[i]][gene_overlap,]
colData(sc_data[[i]]) <- colData(sc_data[[i]])[, coldata_overlap]
colData(sce) <- colData(sce)[, coldata_overlap]
sce <- sce[gene_overlap,]
sce <- cbind(sce, sc_data[[i]])
}
colnames(sce) <- paste0(colnames(sce), "_", sce$protocol)
sce <- runPCA(sce, ncomponents = 20)
sce <- runTSNE(sce)
# sce <- runUMAP(sce)
#colors
cols <-c(c(jcolors('pal6'),jcolors('pal8'))[c(1,8,14,5,2:4,6,7,9:13,15:20)],jcolors('pal4'))
names(cols) <- c()
#param
MultiSample = FALSE
#variables
batch <- "protocol"
celltype <- "cell_line"
sample <- NA
table(colData(sce)[, batch])
sc_10x sc_celseq sc_dropseq
902 274 225
#contrast
cont<-list("sc_10x-sc_celseq" = c(1,-1,0),
"sc_10x-sc_dropseq" = c(1,0,-1),
"sc_celseq-sc_dropseq" = c(0,1,-1))
How much of the variance within data can be attributed to the batch effect?
expr <- as.matrix(assays(sce)$logcounts)
meta_sub <- as.data.frame(colData(sce)[, c(celltype, batch)])
form <- as.formula(paste0("~ (1|", celltype, ") + (1|", batch, ")"))
# varPart <- fitExtractVarPartModel(expr, form, meta_sub)
#
# #Sort variables (i.e. columns) by median fraction# of variance explained
# vp <- varPart
# saveRDS(vp, paste0(out_path,"/vp_", dataset_name, ".rds"))
vp <- readRDS(paste0(out_path,"/vp_", dataset_name, ".rds"))
vp_names <- rownames(vp)
vp <-vp %>% dplyr::mutate(gene= vp_names) %>% dplyr::arrange(-!! rlang::parse_expr(batch))
Warning in class(x) <- c(subclass, tibble_class): Setting class(x) to
multiple strings ("tbl_df", "tbl", ...); result will no longer be an S4
object
vp_sub <- vp[1:3] %>% set_rownames(vp$gene)
#plot
plotPercentBars( vp_sub[1:10,] )
Warning! The custom fig.path you set was ignored by workflowr.
plotVarPart( vp_sub )
Warning! The custom fig.path you set was ignored by workflowr.
Summarize variance partitioning
d_exp <- readRDS(paste0(out_path,"/d_exp_", dataset_name, ".rds"))
d_exp_ct <- readRDS(paste0(out_path,"/d_exp_ct", dataset_name, ".rds"))
#How many genes have a variance component affected by the batch variable with more than 1%
n_batch_gene <- (as_tibble(vp) %>% dplyr::filter(!! rlang::parse_expr(batch) > 0.01) %>% nrow())/dim(sce)[1]
n_batch_gene10 <- (as_tibble(vp) %>% dplyr::filter(!! rlang::parse_expr(batch) > 0.1) %>% nrow())/dim(sce)[1]
n_celltype_gene <- (as_tibble(vp) %>% dplyr::filter(!! rlang::parse_expr(celltype)> 0.01) %>% nrow())/dim(sce)[1]
n_rel <- n_batch_gene/n_celltype_gene
#Deviance modeled
n_batch_gene_mod <- length(which(d_exp > 0.01))/dim(sce)[1]
n_batch_gene10_mod <- length(which(d_exp > 0.1))/dim(sce)[1]
n_celltype_gene_mod <- length(which(d_exp_ct > 0.01))/dim(sce)[1]
n_rel_mod <- n_batch_gene_mod/n_celltype_gene_mod
#The mean percentage of the variance that is explained by the batch effect independent from the celltype
m_batch <- mean(vp[,batch])
m_celltype <- mean(vp[,celltype])
m_rel <- m_batch/m_celltype
m_batch_mod <- mean(d_exp)
m_celltype_mod <- mean(d_exp_ct)
m_rel_mod <- m_batch_mod/m_celltype_mod
#The median percentage of the variance that is explained by the batch effect independent from the celltype
me_batch <- median(vp[, batch])
me_celltype <- median(vp[, celltype])
me_rel <- me_batch/me_celltype
me_batch_mod <- median(d_exp)
me_celltype_mod <- median(d_exp_ct)
me_rel_mod <- me_batch_mod/me_celltype_mod
#Plot varinace/deviance expression
#plot varinace dev-along with expression
rownames(vp) <- vp$gene
vp <- vp[rownames(sce),]
rowData(sce)$vp_batch <- vp[,batch]
rowData(sce)$vp_ct <- vp[,celltype]
rowData(sce)$d_exp <- d_exp
rowData(sce)$d_exp_ct <- d_exp_ct
rowData(sce)$mean_expr <- rowMeans(assays(sce)$logcounts)
ggplot(as.data.frame(rowData(sce)), aes(x = mean_expr, y = vp_batch, colour = d_exp)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE)
Warning! The custom fig.path you set was ignored by workflowr.
ggplot(as.data.frame(rowData(sce)), aes(x = mean_expr, y = d_exp, colour = vp_batch)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE)
Warning! The custom fig.path you set was ignored by workflowr.
ggplot(as.data.frame(rowData(sce)), aes(x = mean_expr, y = vp_ct, colour = d_exp_ct)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE)
Warning! The custom fig.path you set was ignored by workflowr.
ggplot(as.data.frame(rowData(sce)), aes(x = mean_expr, y = d_exp_ct, colour = vp_ct)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE)
Warning! The custom fig.path you set was ignored by workflowr.
meta_tib <- as_tibble(colData(sce)) %>% group_by_at(c(batch, celltype)) %>% summarize(n = n()) %>% dplyr::mutate(cell_freq = n / sum(n))
plot_abundance <- function(cluster_var, tib, x_var){
meta_df <- as.data.frame(eval(tib))
p <-ggplot(data=meta_df, aes_string(x=x_var, y="cell_freq", fill = cluster_var)) +
geom_bar(stat="identity") + scale_fill_manual(values=cols)
p + coord_flip() + theme_minimal()
}
plot_abundance(cluster_var = celltype, tib = meta_tib, x_var = batch)
Warning! The custom fig.path you set was ignored by workflowr.
meta_tib <- as_tibble(colData(sce)) %>% group_by_at(c(batch, celltype)) %>%
summarize(n = n()) %>% spread_(batch,"n")
meta_df <- as.data.frame(eval(meta_tib))[,-1]
meta_comb<-combn(meta_df,2,simplify=FALSE)
res<-lapply(meta_comb,function(x){
cond1<-names(x)[1]
cond2<-names(x)[2]
rel_abund_diff<-mapply(function(cond1,cond2) abs(cond1-cond2)/(cond1+cond2), x[,cond1], x[,cond2])
rel_abund_diff
})
mean_rel_abund_diff<-mean(unlist(res))
min_rel_abund_diff<-min(unlist(res))
max_rel_abund_diff<-max(unlist(res))
Test for significant changes in celltype abundances. Diffcyt (skip for datasets without replicates)
if(MultiSample){
#colData
col_tib <- as_tibble(colData(sce)) %>% group_by_at(c(sample, batch)) %>% summarize()
col_dat <- data.frame("sample_id" = col_tib[,sample], "group_id" = col_tib[,batch]) %>% set_rownames(levels(as.factor(colData(sce)[,sample])))
#create desing matrix
design <- createDesignMatrix(
col_dat, cols_design = c(batch)
)
#create contrast
n_contrast <- length(levels(as.factor(colData(sce)[,batch])))
contrast <- diag(n_contrast)
#create summarizedExperiment for cluster abundance (rows = cluster, columns= samples)
cluster_counts <- as_tibble(colData(sce)) %>% group_by_at(c(sample, celltype)) %>% summarize(n=n()) %>% spread_(sample, "n")
cluster_count <- as.matrix(cluster_counts[,-1])
rownames(cluster_count) <- unlist(cluster_counts[, celltype])
#rowData
row_dat <- as_tibble(colData(sce)) %>% group_by_at(celltype) %>% summarize(n=n()) %>% set_colnames(c("cluster_id", "n_cells"))
se <- SummarizedExperiment(list("counts" = cluster_count), rowData= as.data.frame(row_dat), colData= col_dat)
#differential abundance
res_DA <- testDA_edgeR(se, design, contrast)
#summarize - number of differential abundance cluster
n_da_cluster <- length(which(rowData(res_DA)$p_adj < 0.01))
per_da_cluster <- n_da_cluster/length(levels(as.factor(colData(sce)[,celltype])))
}else{
per_da_cluster <- NA
n_da_cluster<-0
}
Do the overall counts distribution vary between batches? Clusterwise ploting?
#sample an cluster ids
sids <- levels(as.factor(colData(sce)[, batch]))
names(sids) <- sids
cids <- levels(as.factor(colData(sce)[, celltype]))
names(cids) <- cids
#mean gene expression by sample and cluster
mean_list <- lapply(sids, function(batch_var){
mean_cluster <- lapply(cids, function(cluster_var){
counts_sc <- as.matrix(logcounts(
sce[, colData(sce)[, batch] %in% batch_var & colData(sce)[, celltype] %in% cluster_var]))
})
mean_c <- mean_cluster %>% map(rowMeans) %>% bind_rows %>%
dplyr::mutate(gene=rownames(sce)) %>% gather(cluster, logcounts, levels(as.factor(colData(sce)[,celltype])))
})
mean_expr <- mean_list %>% bind_rows(.id= "sample")
ggplot(mean_expr, aes(x=logcounts, colour=sample)) + geom_density(alpha=.3) +
theme_classic() +
facet_wrap( ~ cluster, ncol = 3) +
scale_colour_manual(values = cols[c(1:3,7)]) +
scale_x_continuous(limits = c(0, 7))
Warning: Removed 491 rows containing non-finite values (stat_density).
Warning! The custom fig.path you set was ignored by workflowr.
#number of cluster with differnt batch distributions
per_dist <- 1
Mean expression protocols vs each other
#mean expression
mean_expr <- mean_list %>% bind_rows(.id = "sample" ) %>% spread(sample, logcounts)
batch_all <- levels(as.factor(colData(sce)[,batch]))
lapply(batch_all, function(batch_var){
batch_var_2 <- batch_all[-which(batch_all %in% batch_var)]
lapply(batch_var_2, function(batch_var_3){
ggplot(mean_expr, aes_string(x=batch_var, y=batch_var_3)) +
geom_point(alpha=.3, aes(color=cluster)) +
ggtitle(batch_var) + geom_abline(slope = 1) + coord_fixed() +
facet_wrap( ~ cluster, ncol = 3) +
scale_color_manual(values=cols)
})
})
[[1]]
[[1]][[1]]
Warning! The custom fig.path you set was ignored by workflowr.
[[1]][[2]]
Warning! The custom fig.path you set was ignored by workflowr.
[[2]]
[[2]][[1]]
Warning! The custom fig.path you set was ignored by workflowr.
[[2]][[2]]
Warning! The custom fig.path you set was ignored by workflowr.
[[3]]
[[3]][[1]]
Warning! The custom fig.path you set was ignored by workflowr.
[[3]][[2]]
Warning! The custom fig.path you set was ignored by workflowr.
Calculate cms
# sce <- cms(sce, group = batch, k = 120, cell_min = 10, n_dim = 10)
# saveRDS(sce, paste0(out_path, "/cms_", dataset_name, ".rds"))
sce <- readRDS(paste0(out_path, "/cms_", dataset_name, ".rds"))
# visHist(sce, n_col = 2)
# visMetric(sce, metric_var = "cms_smooth", dim_red = "UMAP")
#summarize
mean_cms <- mean(sce$cms)
n_cms_0.01 <- length(which(sce$cms < 0.01))
cluster_mean_cms <- as_tibble(colData(sce)) %>% group_by_at(celltype) %>% summarize(cms_mean = mean(cms))
var_cms <- var(cluster_mean_cms$cms_mean)
Calculate DEG
x<-sce
clust <- as.factor(colData(sce)[,celltype])
# n_cells <- table(clust,colData(sce)[,sample])
kids<-levels(clust)
names(kids) <- kids
group<-as.factor(colData(sce)[,batch])
cs <- names(cont)
names(cs) <- cs
es<- expr
ctype <- "contrast"
res_df <- function(k, tt, ct, c) {
df <- data.frame(
gene = rownames(tt), cluster_id = k, tt,
row.names = NULL, stringsAsFactors = FALSE)
df[[ct]] <- c
return(df)
}
doDE <- function(x,lfc_cutoff=log2(1.1)){
res<-lapply(kids, function (k) {
cat(k, "..", sep = "")
n <- clust==k
es_tmp <- es[,n]
grp <- group[n]
design <- model.matrix(~0+grp)
colnames(design)<-levels(group)
k1 <- rowSums(es_tmp > 0) >= .2*min(table(grp))
es_tmp <- es_tmp[k1,]
f <- lmFit(es_tmp, design)
f <- eBayes(f, trend = TRUE)
tt <- lapply(cont, function(c) {
cc<-names(c)
fc <- contrasts.fit(f, contrasts = c)
tr <- treat(fc, lfc=lfc_cutoff)
tt <- topTreat(tr, n=Inf)
res_df(k, tt, ctype,cc)
})
return(list(tt = tt, data = es_tmp))
})
# remove empty clusters
skipped <- vapply(res, is.null, logical(1))
if (any(skipped))
message(paste("Cluster(s)", dQuote(kids[skipped]), "skipped due to an",
"insufficient number of cells in at least 2 samples per group."))
res <- res[!skipped]
kids <- kids[names(res)]
# re-organize by contrast &
# do global p-value adjustment
tt <- lapply(res, "[[", "tt")
tt <- lapply(cs, function(c) map(tt, c))
# return results
data <- lapply(res, "[[", "data")
list(table = tt,
data = data,
design = design,
coef = coef)
}
res<-doDE(x,lfc_cutoff=log2(1.1))
H1975..H2228..HCC827..
saveRDS(res, file=paste0(out_path, "/limma_", dataset_name, ".rds"))
res<-readRDS(paste0(out_path, "/limma_", dataset_name, ".rds"))
# count nb. of DE genes by cluster
# vapply(res[[1]][[2]], function(x) sum(x$adj.P.Val < 0.05), numeric(1))
FilterDEGs<-function (degDF=df, filter=c(FDR=5))
{
rownames(degDF)<-degDF$gene
pval <- degDF[, grep("adj.P.Val$", colnames(degDF)), drop = FALSE]
pf <- pval <= filter["FDR"]/100
pf[is.na(pf)] <- FALSE
DEGlistUPorDOWN <- sapply(colnames(pf), function(x) rownames(pf[pf[, x, drop = FALSE], , drop = FALSE]), simplify = FALSE)
}
result<-list()
m2<-list()
for(jj in 1:length(cs)){
result[[jj]]<-sapply(res[[1]][[names(cs)[jj]]], function(x) FilterDEGs(x))
names(result[[jj]])<-kids
m2[[jj]] = make_comb_mat(result[[jj]], mode = "intersect")
}
names(result)<-names(cs)
names(m2)<-names(cs)
# saveRDS(m2, file=paste0(out_path, "/upset_", dataset_name, ".rds"))
# m2<-readRDS(paste0(out_path, "/upset_", dataset_name, ".rds"))
#
#
# lapply(m2, function(x) UpSet(x))
n_de<-lapply(res[[1]],function(y) vapply(y, function(x) sum(x$adj.P.Val < 0.05), numeric(1)))
mean_n_de<-lapply(n_de,function(x) mean(x))
mean_mean_n_de<-mean(unlist(mean_n_de))/dim(sce)[1]
min_mean_n_de<-min(unlist(mean_n_de))/dim(sce)[1]
max_mean_n_de<-max(unlist(mean_n_de))/dim(sce)[1]
n_genes_lfc1<-lapply(res[[1]],function(y) vapply(y, function(x) sum(abs(x$logFC) > 1), numeric(1)))
mean_n_genes_lfc1<-mean(unlist(n_genes_lfc1))/dim(sce)[1]
min_n_genes_lfc1<-min(unlist(n_genes_lfc1))/dim(sce)[1]
max_n_genes_lfc1<-max(unlist(n_genes_lfc1))/dim(sce)[1]
de_overlap<-lapply(result,function(x){
which.Null<-lapply(x,function(y) is.null(y))
ind<-which.Null=='FALSE'
result2<-x[ind]
de_overlap<-length(Reduce(intersect, result2))
de_overlap
})
mean_de_overlap<-mean(unlist(de_overlap))/dim(sce)[1]
min_de_overlap<-min(unlist(de_overlap))/dim(sce)[1]
max_de_overlap<-max(unlist(de_overlap))/dim(sce)[1]
unique_genes_matrix<-NULL
unique_genes<-NULL
cb<-length(names(result[[1]]))
unique_genes<-lapply(result,function(x){
for(i in 1:cb){
unique_genes[i]<-as.numeric(length(setdiff(unlist(x[i]),unlist(x[-i]))))
}
unique_genes_matrix<-cbind(unique_genes_matrix,unique_genes)
unique_genes_matrix
})
unique_genes<-Reduce('cbind', unique_genes)
colnames(unique_genes)<-names(result)
rownames(unique_genes)<-names(result[[1]])
rel_spec1<-NULL
for(i in 1:dim(unique_genes)[2]){
rel_spec<-unique_genes[,i]/de_overlap[[i]]
rel_spec1<-cbind(rel_spec1,rel_spec)
}
mean_rel_spec=mean(rel_spec1)
min_rel_spec=min(rel_spec1)
max_rel_spec=max(rel_spec1)
#percentage of batch affected genes
cond <- gsub("-.*", "", names(n_de))
cond <- c(cond, unique(gsub(".*-", "", names(n_de))))
cond <- unique(cond)
de_be_tab <- n_de %>% bind_cols()
de_be <- cond %>% map(function(x){
de_tab <- de_be_tab[, grep(x, colnames(de_be_tab))]
de_be <- rowMeans(de_tab)
}) %>% bind_cols() %>% set_colnames(cond)
n_cl <- rep(nrow(sce), ncol(de_be)) %>% set_names(colnames(de_be))
p_be <- de_be/n_cl
mean_p_be <- mean(colMeans(p_be))
min_p_be <- min(colMins(as.matrix(p_be)))
max_p_be <- max(colMaxs(as.matrix(p_be)))
#### Could also be inferred from the variance explained by the batches
p_be_mod <- n_batch_gene_mod
#logFoldchange
mean_lfc_cl <- lapply(res[[1]],function(y) vapply(y, function(x){
de_genes <- which(x$adj.P.Val < 0.01)
mean_de <- mean(abs(x[de_genes, "logFC"]))}
, numeric(1))) %>% bind_cols()
min_lfc_cl <- lapply(res[[1]],function(y) vapply(y, function(x){
de_genes <- which(x$adj.P.Val < 0.01)
min_de <- min(abs(x[de_genes, "logFC"]))}
, numeric(1))) %>% bind_cols()
max_lfc_cl <- lapply(res[[1]],function(y) vapply(y, function(x){
de_genes <- which(x$adj.P.Val < 0.01)
max_de <- max(abs(x[de_genes, "logFC"]))}
, numeric(1))) %>% bind_cols()
mean_lfc_be <- mean(colMeans(mean_lfc_cl, na.rm = TRUE))
min_lfc_be <- min(colMins(as.matrix(min_lfc_cl), na.rm = TRUE))
max_lfc_be <- max(colMaxs(as.matrix(max_lfc_cl), na.rm = TRUE))
#rel_be
var_lfc_cl <- colSds(as.matrix(mean_lfc_cl), na.rm = TRUE)
rel_be <- (var_lfc_cl + colMeans(mean_lfc_cl, na.rm = TRUE))/colMeans(mean_lfc_cl, na.rm = TRUE)
mean_rel_be <- mean(rel_be)
min_rel_be <- min(rel_be)
max_rel_be <- max(rel_be)
#p_ct
n_de_unique <- lapply(result,function(x){
de_genes <- unlist(x) %>% unique() %>% length()
}) %>% bind_cols()
rel_spec2 <- NULL
for(i in 1:length(de_overlap)){
rel_spec <- de_overlap[[i]]/n_de_unique[[i]]
rel_spec2 <- cbind(rel_spec2,rel_spec)
}
mean_p_ct <- 1 - mean(rel_spec2)
max_p_ct <- 1 - min(rel_spec2)
min_p_ct <- 1 - max(rel_spec2)
#Size? How much of the variance can be attributed to the batch effect?
size <- data.frame("batch_genes_1per" = n_batch_gene,
"batch_genes_10per" = n_batch_gene10,
"celltype_gene_1per" = n_celltype_gene,
"relative_batch_celltype" = n_rel,
"batch_genes_1per_mod" = n_batch_gene_mod,
"batch_genes_10per_mod" = n_batch_gene10_mod,
"celltype_gene_1per_mod" = n_celltype_gene_mod,
"relative_batch_celltype_mod" = n_rel_mod,
"mean_var_batch" = m_batch,
"mean_var_celltype" = m_celltype,
"median_var_batch" = me_batch,
"median_var_celltype" = me_celltype,
"mean_var_batch_mod" = m_batch_mod,
"mean_var_celltype_mod" = m_celltype_mod,
"median_var_batch_mod" = me_batch_mod,
"median_var_celltype_mod" = me_celltype_mod,
"n_cells_total" = ncol(sce),
"n_genes_total" = nrow(sce))
#Celltype-specificity? How celltype/cluster specific are batch effects? Differences in sample variation between batches?
celltype <- data.frame("DA_celltypes" = per_da_cluster,
"per_count_dist" = per_dist,
"mean_cms" = mean_cms,
'mean_rel_abund_diff' = mean_rel_abund_diff,
'min_rel_abund_diff' = min_rel_abund_diff,
'max_rel_abund_diff' = max_rel_abund_diff,
"celltype_var_cms" = var_cms,
"n_cells_cms_0.01" = n_cms_0.01)
#Gene-specificity? How do they effect genes? Single genes? All genes? Which genes?
gene <- data.frame("mean_mean_n_de_genes" = mean_mean_n_de,
"max_mean_n_de_genes" = max_mean_n_de,
"min_mean_n_de_genes" = min_mean_n_de,
"mean_n_genes_lfc1" = mean_n_genes_lfc1,
"min_n_genes_lfc1" = min_n_genes_lfc1,
"max_n_genes_lfc1" = max_n_genes_lfc1,
"mean_de_overlap" = mean_de_overlap,
"min_de_overlap" = min_de_overlap,
"max_de_overlap" = max_de_overlap,
"mean_rel_cluster_spec"= mean_rel_spec,
"min_rel_cluster_spec"= min_rel_spec,
"max_rel_cluster_spec"= max_rel_spec
)
# Cell-specificity? How cell-specific are batche effects? Are their differences in within celltype variation between batches?
sim <- data.frame("mean_p_be" = mean_p_be,
"max_p_be" = max_p_be,
"min_p_be" = min_p_be,
"p_be_mod" = p_be_mod,
"mean_lfc_be" = mean_lfc_be,
"min_lfc_be" = min_lfc_be,
"max_lfc_be" = max_lfc_be,
"mean_rel_be" = mean_rel_be,
"min_rel_be" = min_rel_be,
"max_rel_be" = max_rel_be,
"mean_p_ct"= mean_p_ct,
"min_p_ct"= min_p_ct,
"max_p_ct"= max_p_ct)
summary <- cbind(size, celltype, gene, sim) %>% set_rownames(dataset_name)
saveRDS(summary, paste0(out_path, "/summary_", dataset_name, ".rds"))
sessionInfo()
R version 3.6.0 (2019-04-26)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 16.04.5 LTS
Matrix products: default
BLAS: /usr/local/R/R-3.6.0/lib/libRblas.so
LAPACK: /usr/local/R/R-3.6.0/lib/libRlapack.so
locale:
[1] LC_CTYPE=en_CA.UTF-8 LC_NUMERIC=C
[3] LC_TIME=en_CA.UTF-8 LC_COLLATE=en_CA.UTF-8
[5] LC_MONETARY=en_CA.UTF-8 LC_MESSAGES=en_CA.UTF-8
[7] LC_PAPER=en_CA.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_CA.UTF-8 LC_IDENTIFICATION=C
attached base packages:
[1] grid parallel stats4 stats graphics grDevices utils
[8] datasets methods base
other attached packages:
[1] ComplexHeatmap_2.0.0 diffcyt_1.4.0
[3] variancePartition_1.14.0 scales_1.0.0
[5] foreach_1.4.4 limma_3.40.2
[7] stringr_1.4.0 dplyr_0.8.1
[9] tidyr_0.8.3 here_0.1
[11] purrr_0.3.2 gridExtra_2.3
[13] CellMixS_1.0.0 kSamples_1.2-9
[15] SuppDists_1.1-9.4 jcolors_0.0.4
[17] scater_1.12.2 ggplot2_3.2.0
[19] CellBench_1.0.0 tibble_2.1.3
[21] magrittr_1.5 SingleCellExperiment_1.6.0
[23] SummarizedExperiment_1.14.0 DelayedArray_0.10.0
[25] BiocParallel_1.18.0 matrixStats_0.54.0
[27] Biobase_2.44.0 GenomicRanges_1.36.0
[29] GenomeInfoDb_1.20.0 IRanges_2.18.1
[31] S4Vectors_0.22.0 BiocGenerics_0.30.0
loaded via a namespace (and not attached):
[1] backports_1.1.4 circlize_0.4.6
[3] workflowr_1.3.0 BiocFileCache_1.8.0
[5] plyr_1.8.4 igraph_1.2.4.1
[7] ConsensusClusterPlus_1.48.0 lazyeval_0.2.2
[9] splines_3.6.0 flowCore_1.50.0
[11] TH.data_1.0-10 digest_0.6.19
[13] htmltools_0.3.6 viridis_0.5.1
[15] gdata_2.18.0 memoise_1.1.0
[17] cluster_2.0.9 doParallel_1.0.14
[19] sandwich_2.5-1 prettyunits_1.0.2
[21] colorspace_1.4-1 blob_1.1.1
[23] rappdirs_0.3.1 rrcov_1.4-7
[25] xfun_0.8 crayon_1.3.4
[27] RCurl_1.95-4.12 graph_1.62.0
[29] lme4_1.1-21 survival_2.44-1.1
[31] zoo_1.8-5 iterators_1.0.10
[33] glue_1.3.1 gtable_0.3.0
[35] zlibbioc_1.30.0 XVector_0.24.0
[37] listarrays_0.2.0 GetoptLong_0.1.7
[39] BiocSingular_1.0.0 shape_1.4.4
[41] DEoptimR_1.0-8 mvtnorm_1.0-10
[43] DBI_1.0.0 edgeR_3.26.5
[45] Rcpp_1.0.1 viridisLite_0.3.0
[47] progress_1.2.2 clue_0.3-57
[49] bit_1.1-14 rsvd_1.0.1
[51] FlowSOM_1.16.0 tsne_0.1-3
[53] httr_1.4.0 gplots_3.0.1.1
[55] RColorBrewer_1.1-2 pkgconfig_2.0.2
[57] XML_3.98-1.20 dbplyr_1.4.0
[59] locfit_1.5-9.1 labeling_0.3
[61] tidyselect_0.2.5 rlang_0.4.0
[63] reshape2_1.4.3 munsell_0.5.0
[65] tools_3.6.0 RSQLite_2.1.1
[67] ggridges_0.5.1 evaluate_0.14
[69] yaml_2.2.0 knitr_1.23
[71] bit64_0.9-7 fs_1.3.0
[73] robustbase_0.93-4 caTools_1.17.1.2
[75] nlme_3.1-139 compiler_3.6.0
[77] pbkrtest_0.4-7 beeswarm_0.2.3
[79] curl_3.3 png_0.1-7
[81] pcaPP_1.9-73 stringi_1.4.3
[83] lattice_0.20-38 Matrix_1.2-17
[85] nloptr_1.2.1 pillar_1.4.1
[87] GlobalOptions_0.1.0 BiocNeighbors_1.2.0
[89] cowplot_0.9.4 bitops_1.0-6
[91] irlba_2.3.3 corpcor_1.6.9
[93] colorRamps_2.3 R6_2.4.0
[95] KernSmooth_2.23-15 vipor_0.4.5
[97] codetools_0.2-16 boot_1.3-22
[99] MASS_7.3-51.4 gtools_3.8.1
[101] assertthat_0.2.1 rprojroot_1.3-2
[103] rjson_0.2.20 withr_2.1.2
[105] multcomp_1.4-10 GenomeInfoDbData_1.2.1
[107] hms_0.4.2 minqa_1.2.4
[109] rmarkdown_1.12 DelayedMatrixStats_1.6.0
[111] Rtsne_0.15 git2r_0.25.2
[113] lubridate_1.7.4 ggbeeswarm_0.6.0